clear all 
cap log close

*Main data pathway globals, set directory to clean data folder
global cleandata "S:\Project\DemoSos2\common\felles\JR_RG\DrVA\CleanData\"
cd "$cleandata"
global rawdata "S:\Project\DemoSos2\data2020\"
capture mkdir "$cleandata"


**------------------------------------------------------------------------------*
*Detemine first exogenous swap (reduce, terminate), date, new GP, and previous GP 
*Determining a characteristics that are needed
**------------------------------------------------------------------------------*
use FinalSet4.dta, clear

******Determine each swap
* strict swaps
forvalues yr=2002/2020   {
	gen str_exog_swap`yr' = 1 if red_swap`yr' == 1 | t_swap`yr' == 1 | red_term_swap`yr' == 1
	replace str_exog_swap`yr' = 0 if str_exog_swap`yr' == .
	label var str_exog_swap`yr' "Strict exogenous swap (reduce list, terminate list) - `yr'"
}

* any swap
forvalues yr=2002/2020   {
	gen dswap`yr' = 1 if red_swap`yr' == 1 | t_swap`yr' == 1 | red_term_swap`yr' == 1 | a_swap`yr' == 1 | o_swap`yr' == 1 | w_swap`yr' == 1 | other_swap`yr' == 1
	replace dswap`yr' = 0 if dswap`yr' == .
	label var dswap`yr' "Any swap - `yr'"
}

* endog swap
forvalues yr=2002/2020   {
	gen end_swap`yr' = 1 if a_swap`yr' == 1 | o_swap`yr' == 1 | w_swap`yr' == 1 | other_swap`yr' == 1
	replace end_swap`yr' = 0 if end_swap`yr' == .
	label var end_swap`yr' "Any swap - `yr'"
}


******Total swaps
* strict exogenous
egen total_str_exog_swap = rowtotal(str_exog_swap*)
label var total_str_exog_swap "Count; exogenous swap (reduce list or terminate list) - 2002-2020"

* all  swaps
egen total_dswap = rowtotal(dswap*)
label var total_dswap "Total swaps - 2002-2020"

* endog exogenous
egen total_end_swap = rowtotal(end_swap*)
label var total_end_swap "Endogenous swap - 2002-2020"


******Any swaps
* strict exogenous
gen any_str_exog_swap = (total_str_exog_swap>0 & total_str_exog_swap!=.)
label var any_str_exog_swap "Any exogenous swap (reduce list or terminate list) - 2002-2020"

* endogenous
gen any_end_swap = (total_end_swap>0 & total_end_swap!=.)
label var any_end_swap "Any endogenous swap - 2002-2020"

gen any_end_swap1620 =  end_swap2016==1 | end_swap2017==1 | end_swap2018==1 | end_swap2019==1 | end_swap2020==1 
label var any_end_swap1620 "Any endogenous swap - 2016-2020"

* any swaps
gen any_swap = (total_dswap>0 & total_dswap!=.)
label var any_swap "Any strict exogenous swap (reduce list or terminate list) - 2002-2020"


******Year of first strict exogenous swaps
gen yr_str_exog_swap=.
gen str_problemswap = .
forvalues yr = 2019(-1)2002{
	replace yr_str_exog_swap = `yr' if str_exog_swap`yr' == 1
}
label var yr_str_exog_swap "Year of first strict exogenous swap"

**Year of swap
gen yr_swap=.
forvalues yr = 2019(-1)2002{
	replace yr_swap = `yr' if dswap`yr' == 1
}
label var yr_swap "Year of first  swap"

**Year of swap
gen yr_end_swap=.
forvalues yr = 2019(-1)2002{
	replace yr_end_swap = `yr' if end_swap`yr' == 1
}
label var yr_end_swap "Year of first end swap"


******Age at first swap
* strict exogenous
gen str_exog_age = .
replace str_exog_age = yr_str_exog_swap - byr if yr_str_exog_swap!=. & byr!=.
label var str_exog_age "Age at first strict exogenous swap"

* any swap
gen swap_age = .
replace swap_age = yr_swap - byr if yr_swap!=. & byr!=.
label var swap_age "Age at first swap"

* any swap
gen end_swap_age = .
replace end_swap_age = yr_end_swap - byr if yr_end_swap!=. & byr!=.
label var end_swap_age "Age at first end swap"

******Muni of first swaps
* strict exogenous
gen str_exog_muni = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace str_exog_muni = bokom`nxtyr' if yr_str_exog_swap == `yr'
}
label var str_exog_muni "Municipality of first strict exogenous swap"

* any swap 
gen swap_muni = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace swap_muni = bokom`nxtyr' if swap_muni == `yr'
}
label var swap_muni "Municipality of first swap"


******GPID of first swaps (cluster)
* exogenous
gen str_exogGPIDnew = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace str_exogGPIDnew = gpid`nxtyr' if yr_str_exog_swap == `yr'
}
format str_exogGPIDnew %12.0g
label var str_exogGPIDnew "GP ID of first strict exogenous swap"

* any
gen swap_GPIDnew = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace swap_GPIDnew  = gpid`nxtyr' if yr_swap == `yr'
}
format swap_GPIDnew %12.0g
label var swap_GPIDnew "GP ID of first swap"


* any
gen swap_end_GPIDnew = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace swap_end_GPIDnew  = gpid`nxtyr' if yr_end_swap == `yr'
}
format  swap_end_GPIDnew %12.0g
label var  swap_end_GPIDnew "GP ID of first end swap"



******Previous GPID of first exogenous swaps (FE)
gen str_exogGPIDprev = .
forvalues yr = 2002/2019  {
	replace str_exogGPIDprev = gpid`yr' if yr_str_exog_swap == `yr'
}
format str_exogGPIDprev %12.0g
label var str_exogGPIDprev "Previous GP ID of first exogenous swap"


******Previous GPID of first swaps (FE)
gen swap_GPIDprev = .
forvalues yr = 2002/2019   {
	replace swap_GPIDprev = gpid`yr' if yr_swap == `yr'
}
format swap_GPIDprev %12.0g
label var swap_GPIDprev "Previous GP ID of first swap"

******Previous GPID of first swaps (FE)
gen swap_end_GPIDprev = .
forvalues yr = 2002/2019   {
	replace swap_end_GPIDprev = gpid`yr' if yr_swap == `yr'
}
format swap_end_GPIDprev %12.0g
label var swap_end_GPIDprev "Previous GP ID of first end wap"


******Previous GP age of first swaps 
gen str_exog_drage_prev=.
forvalues yr = 2002/2019   {
	replace str_exog_drage_prev = age_gp`yr' if yr_str_exog_swap == `yr'
	
}
label var str_exog_drage_prev "Previous GP age"
bys str_exogGPIDprev: egen min_age_prev = min(str_exog_drage_prev)
bys str_exogGPIDprev: egen max_age_prev = max(str_exog_drage_prev)
label var min_age_prev "Previous GP min age at exog swap"
label var max_age_prev "Previous GP max age at exog swap"

******Age of doctor
gen dr_age_str_exog_swap=.
gen dr_age_swap=.
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_age_str_exog_swap = age_gp`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_age_swap = age_gp`nxtyr' if yr_swap == `yr'
	
}
label var dr_age_str_exog_swap "Age of dr first strict exog swap"
label var dr_age_swap "Age of dr first swap"	

******GP sex
gen dr_male_str_exog_swap = .
gen dr_male_swap = .
gen dr_end_male_swap = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_male_str_exog_swap = male_gp`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_male_swap = male_gp`nxtyr' if yr_swap == `yr'
	replace dr_end_male_swap = male_gp`nxtyr' if yr_end_swap == `yr'
}
label var dr_male_str_exog_swap "GP male of first exogenous swap"
label var dr_male_swap "GP male of first swap"
label var dr_end_male_swap "GP male of first swap"


******GP specialization 
gen dr_spec_str_exog_swap = .
gen dr_spec_swap = .
gen dr_list_length_str_exog_swap = .

forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_spec_str_exog_swap = spec_gp`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_spec_swap = spec_gp`nxtyr' if yr_swap == `yr'
	replace dr_list_length_str_exog_swap = list_gp`nxtyr' if yr_str_exog_swap == `yr'
}

label var dr_spec_str_exog_swap "GP specialist of first strict exogenous swap"
label var dr_spec_swap "GP specialist of first swap"
label var dr_list_length_str_exog_swap "GP list length of first strict exogenous swap"


******GP list length 
gen dr_list_length_swap = .
gen dr_share_str_exog_swap = .
gen dr_share_swap = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_list_length_swap = list_gp`nxtyr' if yr_swap == `yr'
	replace dr_share_str_exog_swap = fellesliste`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_share_swap = fellesliste`nxtyr' if yr_swap == `yr'
}
label var dr_list_length_swap "GP list length of first swap"
label var dr_share_str_exog_swap "GP list shared of first strict exogenous swap"
label var dr_share_swap "GP list shared of first swap"

******GP group office 
gen dr_group_str_exog_swap = .
gen dr_group_swap = .

forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_group_str_exog_swap = gruppepraksis`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_group_swap = gruppepraksis`nxtyr' if yr_swap == `yr'
}

label var dr_group_str_exog_swap "GP group office of first strict exogenous swap"
label var dr_group_swap "GP group office of first swap"


******GP max list of first  swaps 
gen dr_max_swap = .
gen dr_max_str_exog_swap = .
forvalues yr = 2002/2019   {
	local nxtyr = `yr' + 1
	replace dr_max_str_exog_swap = maxlist_gp`nxtyr' if yr_str_exog_swap == `yr'
	replace dr_max_swap = maxlist_gp`nxtyr' if yr_swap == `yr'
}
label var dr_max_swap "GP max list of first swap"
label var dr_max_str_exog_swap "GP max list of first strict exogenous swap"


*Number of referrals
foreach var in ref_time nb_ref_short {
	ge str_exog`var' = .
	forvalues yr = 2007/2015{
		local prevyr = `yr' - 1
		replace str_exog`var' = `var'`prevyr' if yr_str_exog_swap == `yr'
		replace str_exog`var' = 0 if  `prevyr' >= 2006 & str_exog`var'==.
	}
	label var str_exog`var' "Total `var' year before strict exg swap"
}


foreach var in ref_time nb_ref_short {
	ge swap_`var' = .
	forvalues yr = 2007/2015 {
		local prevyr = `yr' - 1
		replace swap_`var' = `var'`prevyr' if yr_swap == `yr'
		replace swap_`var' = 0 if   `prevyr' >= 2006 & swap_`var'==.
	}
	label var swap_`var' "Total `var' year before swap"
}


drop *2020
*Keep new vars
keep lopenr total_str_exog_swap any_str_exog_swap yr_str_exog_swap ///
str_exog_age str_exog* *swap* str_exogGPIDnew str_exogGPIDprev str_exog*  ///
dr_age_str_exog_swap dr_spec_str_exog_swap dr_*_str_exog_swap dr_male_str_exog_swap bokom20* gpid20* ///
 str_exog_drage_prev min_age_prev max_age_prev
qui compress
save swap_and_GP_rev3.dta, replace


**------------------------------------------------------------------------------*
*Determine patient's parents baseline characteristics 
**------------------------------------------------------------------------------*
use FinalSet4.dta, clear

******dad_age_birth (dad age at birth)
gen dad_age_birth = .
replace dad_age_birth = byr - byr_dad if byr_dad!=. & byr!=.
label var dad_age_birth "Dad age at birth"

******mom_age_birth (mom age at birth)
gen mom_age_birth = .
replace mom_age_birth = byr - byr_mom if byr_mom!=. & byr!=.
label var mom_age_birth "Mom age at birth"

******bu_dad - <HS (at age 0)
gen dad_edu_LessHS=.
forvalues yr=1992/2014{
replace dad_edu_LessHS = 1 if byr == `yr' & bu_dad`yr' < 300000
replace dad_edu_LessHS = 0 if byr == `yr' & bu_dad`yr' >= 300000 & bu_dad`yr' < 900000
}
label var dad_edu_LessHS "Dad <HS at birth"

******bu_dad - dad_edu_yrs (at age 0)
gen dad_edu_yrs=.
forvalues yr=1992/2014{
replace dad_edu_yrs = 0 if byr == `yr' & bu_dad`yr' < 100000
replace dad_edu_yrs = 7 if byr == `yr' & bu_dad`yr' >= 100000 & bu_dad`yr' < 200000
replace dad_edu_yrs = 10 if byr == `yr' & bu_dad`yr' >= 200000 & bu_dad`yr' < 300000
replace dad_edu_yrs = 13 if byr == `yr' & bu_dad`yr' >= 300000 & bu_dad`yr' < 400000
replace dad_edu_yrs = 14 if byr == `yr' & bu_dad`yr' >= 400000 & bu_dad`yr' < 500000
replace dad_edu_yrs = 15 if byr == `yr' & bu_dad`yr' >= 500000 & bu_dad`yr' < 600000
replace dad_edu_yrs = 16 if byr == `yr' & bu_dad`yr' >= 600000 & bu_dad`yr' < 700000
replace dad_edu_yrs = 17.5 if byr == `yr' & bu_dad`yr' >= 700000 & bu_dad`yr' < 800000
replace dad_edu_yrs = 21.5 if byr == `yr' & bu_dad`yr' >= 800000 & bu_dad`yr' < 900000
}
label var dad_edu_yrs "Dad years of education at birth"

******bu_mom - <HS (at age 0)
gen mom_edu_LessHS=.
forvalues yr=1992/2014{
replace mom_edu_LessHS = 1 if byr == `yr' & bu_mom`yr' < 300000
replace mom_edu_LessHS = 0 if byr == `yr' & bu_mom`yr' >= 300000 & bu_mom`yr' < 900000
}
label var mom_edu_LessHS "Mom <HS at birth"
******bu_mom - mom_edu_yrs (at age 0)
gen mom_edu_yrs=.
forvalues yr=1992/2014{
replace mom_edu_yrs = 0 if byr == `yr' & bu_mom`yr' < 100000
replace mom_edu_yrs = 7 if byr == `yr' & bu_mom`yr' >= 100000 & bu_mom`yr' < 200000
replace mom_edu_yrs = 10 if byr == `yr' & bu_mom`yr' >= 200000 & bu_mom`yr' < 300000
replace mom_edu_yrs = 13 if byr == `yr' & bu_mom`yr' >= 300000 & bu_mom`yr' < 400000
replace mom_edu_yrs = 14 if byr == `yr' & bu_mom`yr' >= 400000 & bu_mom`yr' < 500000
replace mom_edu_yrs = 15 if byr == `yr' & bu_mom`yr' >= 500000 & bu_mom`yr' < 600000
replace mom_edu_yrs = 16 if byr == `yr' & bu_mom`yr' >= 600000 & bu_mom`yr' < 700000
replace mom_edu_yrs = 17.5 if byr == `yr' & bu_mom`yr' >= 700000 & bu_mom`yr' < 800000
replace mom_edu_yrs = 21.5 if byr == `yr' & bu_mom`yr' >= 800000 & bu_mom`yr' < 900000
}
label var mom_edu_yrs "Mom years of education at birth"

******civ_mom - Mom married (at age 0)
gen mom_married=.
forvalues yr=1975/2014{
replace mom_married = 1 if byr == `yr' & civ_mom`yr' == 2
replace mom_married = 0 if byr == `yr' & civ_mom`yr' != 2 & civ_mom`yr' != .
}
label var mom_married "Mom married at birth"

keep lopenr sex NORborn birthorder siblings dad_age_birth mom_age_birth dad_edu_LessHS dad_edu_yrs mom_edu_LessHS mom_edu_yrs mom_married byr
save patientparent_char_rev3.dta, replace



**------------------------------------------------------------------------------*
*Determine patient's own baseline characteristics (exog swap)
**------------------------------------------------------------------------------*
use lopenr year npr_days npr npr_in npr_out npr_er npr_noer charlindex using npr, clear
for var npr_days npr npr_in npr_out npr_er npr_noer charlindex: rename X X_base
tempfile nprbase
save `nprbase'


use "khurh_annual", clear
keep total simple_contact major_surg life_style_adv reimburse cancer digest_chronic heart_chronic endoc_chronic any_cardiogp labtest procedure sl blood_test lopenr year
merge  m:1 year using cpi
keep if _m==3
drop _m
ge reimburse_def = reimburse*cpi/100
drop cpi reimburse
rename reimburse_def reimburse
for var cancer digest_chronic heart_chronic endoc_chronic any_cardiogp total simple_contact major_surg life_style_adv reimburse labtest procedure sl blood_test: rename X X_base
tempfile khurbase
save `khurbase'

use "referral_ind", clear
ge nb_ref_short_base = diff>0 & diff <= 4
drop diff
tempfile ref
save `ref'				
	
					
use swap_and_GP_rev3.dta, clear
keep lopenr yr_str_exog_swap  //keep id and year of first exog swap
keep if yr_str_exog_swap!=.
duplicates drop

***Education
gen aar=yr_str_exog_swap-2
merge 1:1 lopenr aar using "Uutdanning.dta", keepusing(BU) keep(1 3) nogen
*keeps if year is yr of swap
keep if aar==yr_str_exog_swap-2
destring BU, replace

*edu_yrs (at swap)
gen edu_yrs=.
replace edu_yrs = 0 if BU < 100000
replace edu_yrs = 7 if BU >= 100000 & BU < 200000
replace edu_yrs = 10 if BU >= 200000 & BU < 300000
replace edu_yrs = 13 if BU >= 300000 & BU < 400000
replace edu_yrs = 14 if BU >= 400000 & BU < 500000
replace edu_yrs = 15 if BU >= 500000 & BU < 600000
replace edu_yrs = 16 if BU >= 600000 & BU < 700000
replace edu_yrs = 17.5 if BU >= 700000 & BU < 800000
replace edu_yrs = 21.5 if BU >= 800000 & BU < 900000
label var edu_yrs "Own years of education at at swap"

tostring BU, replace
gen bu_1 = real(substr(BU, 1, 1))


***Income, sick leave, hospitalizations
ge year=yr_str_exog_swap-2

merge 1:1 lopenr using kids, keep(1 3) nogen
merge 1:1 lopenr year using income_datanew, keep(1 3) nogen keepusing(woverfor wskpl_overf folketrygd uforetrygd arbledtrygd aap sykepenger wskfrie_overf sos_stonad tot_income lab_income dself_inc insoc unempben daap innav di)
merge 1:1 lopenr year using hours, keep(1 3) nogen keepusing(hours)
merge 1:1 lopenr year using `nprbase', keep(1 3) nogen
merge 1:1 lopenr year using `khurbase', keep(1 3) nogen
merge 1:1 lopenr year using `ref', keep(1 3) nogen
merge 1:1 lopenr year using sick_leave, keep(1 3) nogen keepusing(total_days_sl)

ge dwoverfor = woverfor >0 if woverfor!=.	


rename total_days_sl total_days_sl_base
*npr available from 2008 only
foreach var in npr_days npr npr_in npr_out npr_er npr_noer charlindex  {
	replace `var'_base = 0 if `var'_base==.  & yr_str_exog_swap>=2010 
	ge d`var'_base = `var'_base> 0  if yr_str_exog_swap>=2010
}

* khur only from 2006
foreach var in cancer digest_chronic heart_chronic endoc_chronic any_cardiogp total simple_contact reimburse labtest procedure sl life_style_adv blood_test nb_ref_short {
	replace `var'_base = 0 if `var'_base==.  & yr_str_exog_swap>=2008
	ge d`var'_base = `var'_base> 0 if yr_str_exog_swap>=2008
}

label var sykepenger 		"Sick Leave Benefits"
label var wskfrie_overf 	"Tax free transfers"
label var wskpl_overf 		"Taxable transfers"
label var di				"DI"
label var woverfor			"Welfare benefits - 2 years before swap"
label var dwoverfor			"Any welfare benefits - 2 years before swap"
label var lab_income 		"Emp income - 2 years before swap"
label var tot_income 		"Total inc 2 years before swap"
label var hours 			"Weekly hours 2 years before swap"
label var npr_days_base 	"Hospital days - 2 years before swap"
label var npr_base 			"Hospital stays - 2 years before swap"
label var npr_in_base 		"Hospital inp stays - 2 years before swap"
label var npr_out_base		"Hospital outp stays - 2 years before swap"
label var npr_er_base		"Hospital ER stays - 2 years before swap"
label var npr_noer_base		"Hospital Non-ER stays - 2 years before swap"
label var charlindex_base	"Hospital days - 2 years before swap"
label var dnpr_days_base	"Hospital days - 2 years before swap"
label var dnpr_base			"Hospital stays - 2 years before swap"
label var dnpr_in_base 		"Hospital inp stays - 2 years before swap"
label var dnpr_out_base		"Hospital outp stays - 2 years before swap"
label var dnpr_er_base 		"Hospital ER stays - 2 years before swap"
label var dnpr_noer_base 	"Hospital Non-ER stays - 2 years before swap"
label var total_days_sl		"Sick Leave Days - 2 years before swap"


***Married
rename aar _aar
ge aar=yr_str_exog_swap-2
merge 1:1 lopenr aar using "sivilstand.dta", keep(1 3) nogen
keep if aar==yr_str_exog_swap-2
ge married_base= sivilstand==2|sivilstand==6 if sivilstand!=.
label var married_base "Married/Partnership 2 years before swap"

drop aar  _aar fodselsaar_mnd_barn_01
save patientown_char_rev3.dta, replace



**------------------------------------------------------------------------------*
*Determine patient's own baseline characteristics (any swap)
**------------------------------------------------------------------------------*
use lopenr year npr using npr, clear
for var npr: rename X X_any
tempfile nprany
save `nprany'


use "khurh_annual", clear
keep total simple_contact major_surg life_style_adv reimburse cancer digest_chronic heart_chronic endoc_chronic any_cardiogp labtest procedure sl lopenr year
merge  m:1 year using cpi
keep if _m==3
drop _m
ge reimburse_def = reimburse*cpi/100
drop cpi reimburse
rename reimburse_def reimburse
for var cancer digest_chronic heart_chronic endoc_chronic any_cardiogp total simple_contact major_surg life_style_adv reimburse labtest procedure sl: rename X X_any
tempfile khurany
save `khurany'

					
use swap_and_GP_rev3.dta, clear
keep lopenr yr_swap  //keep id and year of first swap
keep if yr_swap!=.
duplicates drop

***Education
gen aar=yr_swap-2 
merge 1:1 lopenr aar using "Uutdanning.dta", keepusing(BU) keep(1 3) nogen
*keeps if year is yr of swap
keep if aar==yr_swap-2
destring BU, replace

tostring BU, replace
gen bu_1_any = real(substr(BU, 1, 1))
drop BU aar

***Income, sick leave, hospitalizations
ge year=yr_swap-2
merge 1:1 lopenr using kids, keep(1 3) nogen
merge 1:1 lopenr year using income_datanew, keep(1 3) nogen keepusing(tot_income lab_income woverfor)
merge 1:1 lopenr year using `nprany', keep(1 3) nogen
merge 1:1 lopenr year using `khurany', keep(1 3) nogen
merge 1:1 lopenr year using sick_leave, keep(1 3) nogen keepusing(total_days_sl)

ge dwoverfor_any = woverfor >0 if woverfor!=.	
drop woverfor
rename tot_income tot_income_any
rename lab_income lab_income_any
rename total_days_sl total_days_sl_any
replace total_days_sl_any = 0 if total_days_sl_any ==. & lab_income_any>0 & lab_income_any!=.

*npr available from 2008 only
foreach var in npr   {
	replace `var'_any = 0 if `var'_any==.  & yr_swap>=2010  
	ge d`var'_any = `var'_any> 0  if yr_swap>=2010
}

* khur only from 2006
foreach var in cancer digest_chronic heart_chronic endoc_chronic any_cardiogp total reimburse labtest procedure sl  {
	replace `var'_any = 0 if `var'_any==.  & yr_swap>=2008
	ge d`var'_any = `var'_any> 0 if yr_swap>=2008 
}


label var dwoverfor_any		"Any welfare benefits - 2 years before swap"
label var lab_income_any 	"Emp income - 2 years before swap"
label var tot_income_any 	"Total inc 2 years before swap"
label var npr_any 			"Hospital stays - 2 years before swap"
label var dnpr_any			"Hospital stays - 2 years before swap"
label var total_days_sl_any	"Sick Leave Days - 2 years before swap"

***Married
ge aar=yr_swap-2
merge 1:1 lopenr aar using "sivilstand.dta", keep(1 3) nogen
keep if aar==yr_swap-2
ge married_any= sivilstand==2|sivilstand==6 if sivilstand!=.
label var married_any "Married/Partnership 2 years before swap"

rename antbarn antbarn_any

drop aar fodselsaar_mnd_barn_01 yr_birth1 year
save patientown_char_any_rev3.dta, replace


**------------------------------------------------------------------------------*
*** VARIABLES FOR OUTCOMES AT UP TO 5 YEARS ***
**------------------------------------------------------------------------------*
**------------------------------------------------------------------------------*
*** Referrals & hospitalizations ***
**------------------------------------------------------------------------------*
foreach num in 1 2 3 4 5 {
	use referral_ind, clear
	rename year year`num'
	rename diff ref`num'
	save referral_ind`num', replace
}


foreach num in 1 2 3 4 5 {	
	use total simple_contact consult sick_visit procedure blood_test labtest small_procedure large_procedure appointment reimburse sl sl_p cancer digest_chronic heart_chronic endoc_chronic diag_L diag_K diag_D diag_P diag_test prescription referral life_style_adv lopenr year using khurh_annual, clear

	merge  m:1 year using cpi
	keep if _m==3
	drop _m
	ge reimburse_def = reimburse*cpi/100
	drop cpi reimburse
	
	foreach var of varlist _all {
		rename `var' `var'`num'
	}
	rename lopenr`num'	lopenr
	save khur`num', replace
	
	use khurh_er, clear
	merge  m:1 year using cpi
	keep if _m==3
	drop _m
	ge reimburse_er = reimburse*cpi/100
	drop cpi reimburse
	
	foreach var of varlist _all {
		rename `var' `var'`num'
	}
	rename lopenr`num'	lopenr
	save khur_er`num', replace
}


foreach num in 1 2 3 4 5 {	
	use npr, clear
	egen npr_ac_inj = rsum(npr_accident  npr_injuries)
	
	drop npr_accident  npr_injuries
	foreach var of varlist _all {
		rename `var' `var'`num'
	}
	rename lopenr`num'	lopenr
	save npr`num'_rev, replace
		
}


use khurh_annual, clear
	keep diag_A lopenr year 
	rename diag_A diag_A1
	rename year year1
		
save khur1_screening, replace
	

foreach i of numlist 2006/2018 {
	use  "kurh_`i'.dta", clear
	
	ge pap_test =  regexm(diagnose, "A981") if TypeDiag == "ICPC-2" & gender == 2
	
	ge general_sy = regexm(diagnose, "A01") | regexm(diagnose, "A02") |regexm(diagnose, "A03") | regexm(diagnose, "A04") | regexm(diagnose, "A05") | ///	
			regexm(diagnose, "A06") | regexm(diagnose, "A07") |regexm(diagnose, "A08") | regexm(diagnose, "A09") | regexm(diagnose, "A10") | ///	
			regexm(diagnose, "A11") | regexm(diagnose, "A21") |regexm(diagnose, "A23") | regexm(diagnose, "A26") | regexm(diagnose, "A27") | ///	
			regexm(diagnose, "A28") | regexm(diagnose, "A29") |regexm(diagnose, "A79") | regexm(diagnose, "A88") | regexm(diagnose, "A98")  if TypeDiag == "ICPC-2"
	
	ge dig_sy = regexm(diagnose, "D01") | regexm(diagnose, "D02") | regexm(diagnose, "D03") | regexm(diagnose, "D04") | regexm(diagnose, "D05") | ///	
			regexm(diagnose, "D06") | regexm(diagnose, "D07") | regexm(diagnose, "D08") | regexm(diagnose, "D09") | regexm(diagnose, "D10") | ///	
			regexm(diagnose, "D11") | regexm(diagnose, "D12") | regexm(diagnose, "D13") | regexm(diagnose, "D14") |regexm(diagnose, "D15") | ///
			regexm(diagnose, "D16") | regexm(diagnose, "D17") | regexm(diagnose, "D18") | regexm(diagnose, "D19") |regexm(diagnose, "D20") | ///
			regexm(diagnose, "D21") | regexm(diagnose, "D23") | regexm(diagnose, "D24") | regexm(diagnose, "D25") |regexm(diagnose, "D26") | ///
			regexm(diagnose, "D27") | regexm(diagnose, "D28") | regexm(diagnose, "D29")  if TypeDiag == "ICPC-2"
			
	ge resp_sy = regexm(diagnose, "R01") | regexm(diagnose, "R02") | regexm(diagnose, "R03") | regexm(diagnose, "R04") | regexm(diagnose, "R05") | ///	
			regexm(diagnose, "R06") | regexm(diagnose, "R07") | regexm(diagnose, "R08") | regexm(diagnose, "R09") | ///	
			regexm(diagnose, "R21") | regexm(diagnose, "R23") | regexm(diagnose, "R24") | regexm(diagnose, "R25") |regexm(diagnose, "R26") | ///
			regexm(diagnose, "R27") | regexm(diagnose, "R28") | regexm(diagnose, "R29")  if TypeDiag == "ICPC-2"
			
	ge skin_sy = regexm(diagnose, "S02") | regexm(diagnose, "S06") | regexm(diagnose, "S07") | regexm(diagnose, "S26") | ///	
			regexm(diagnose, "S29") | regexm(diagnose, "S97") 	if TypeDiag == "ICPC-2"
			
	ge fem_sy = regexm(diagnose, "X12") | regexm(diagnose, "X18") | regexm(diagnose, "X19") | regexm(diagnose, "X25") | regexm(diagnose, "X26") | ///	
			regexm(diagnose, "X86") if TypeDiag == "ICPC-2" & gender == 2
	
	ge male_sy = regexm(diagnose, "Y02") | regexm(diagnose, "Y05") | regexm(diagnose, "Y06") | regexm(diagnose, "Y26") if TypeDiag == "ICPC-2" & gender == 1
			
	collapse (sum) general_sy dig_sy resp_sy fem_sy male_sy pap_test, by(lopenr year)
	save "khur_screen`i'", replace
}

use "khur_screen2006", clear
for num 2007/2018: append using "khur_screenX"
rename year year1
save "khur_screen", replace




**------------------------------------------------------------------------------*
*** Mortality ***
**------------------------------------------------------------------------------*
use using "S:\Project\DemoSos2\data2020\DAR\DAR.dta", clear
duplicates drop
drop if dodsaar==2020

bysort lopenr: gen x=_n
count //  1,686,564
count if x>1 // 2
drop if x>1
drop x

rename dodsaar yr_death

ge idc10_1  = substr(diagnose_underliggende_k,1,1) if type_diagnose_kodeverk_k== "10"
ge idc10_2  = substr(diagnose_underliggende_k,2,2) if type_diagnose_kodeverk_k== "10"
destring idc10_2, replace

ge inf =  idc10_1=="A" | idc10_1=="B"  						if type_diagnose_kodeverk_k== "10"
ge cancer =  idc10_1=="C" | idc10_1 =="D" 					if type_diagnose_kodeverk_k== "10"
ge cancer_c =  idc10_1=="C"									if type_diagnose_kodeverk_k== "10"
ge cancer_d =  idc10_1=="D"									if type_diagnose_kodeverk_k== "10"
ge mental =  idc10_1=="F" 									if type_diagnose_kodeverk_k== "10"
ge heart =  idc10_1=="I" 									if type_diagnose_kodeverk_k== "10"
ge o_heart =  idc10_1=="R" & (idc10_2>=0 &  idc10_2<=9)		if type_diagnose_kodeverk_k== "10"
ge resp =  idc10_1=="J" 									if type_diagnose_kodeverk_k== "10"
ge digest =  idc10_1=="K" 									if type_diagnose_kodeverk_k== "10"
ge ext =   idc10_1=="V" | idc10_1=="W"  | idc10_1=="X" | idc10_1=="Y" | ( idc10_1=="U" & idc10_2>=01 & idc10_2<=03) 						if type_diagnose_kodeverk_k== "10"
ge accident = idc10_1=="V" | idc10_1=="W" | ( idc10_1=="X" & (idc10_2>=1 &  idc10_2<=59)) | (idc10_1=="Y" & (idc10_2>=85 &  idc10_2<=86)) 	if type_diagnose_kodeverk_k== "10"
ge suicide = (idc10_1=="X" & (idc10_2>=60 &  idc10_2<=84)) | idc10_1=="Y" & idc10_2==87 | idc10_1=="U" & idc10_2==03 						if type_diagnose_kodeverk_k== "10"
ge homicide = (idc10_1=="X" & (idc10_2>=85 &  idc10_2<=99)) | idc10_1=="Y" & (idc10_2>=0 & idc10_2<=9 | idc10_1=="U" & idc10_2==01 | idc10_1=="U" & idc10_2==02) if type_diagnose_kodeverk_k== "10"

save mortality_rev.dta, replace


**------------------------------------------------------------------------------*
*** health of children ***
**------------------------------------------------------------------------------*
use "S:\Project\DemoSos2\data2020\MFR\MFR.dta", clear
drop if lopenummer_mor==""
keep lopenummer_mor svlen spabort_12_5 spabort_23_5  vekt apgar1 apgar5 lengde hode fdato
duplicates drop

ge year_baby = year(fdato)
drop fdato

replace spabort_12_5 = spabort_12_5==1 if spabort_12_5>1 & spabort_12_5!=.
replace spabort_23_5 = spabort_23_5==1 if spabort_23_5>1 & spabort_23_5!=.
drop spabort_12_5 spabort_23_5
 

rename vekt 	bweight
rename lengde 	lenght
rename hode		head_cir
rename svlen gestweeks

for var apgar1 apgar5: ge X_10 =X==10 if X!=.
ge lweight = bweight < 2500

rename lopenummer_mor lopenr  
** start with first births & singletons
bys lopenr (year_baby): ge n = _n
keep if n == 1
drop n
bys lopenr year_baby: ge n = _N
keep if n == 1
drop n
compress
save "births_rev.dta", replace



**------------------------------------------------------------------------------*
*** Income/SL as outcome 
**------------------------------------------------------------------------------*

foreach num in 2 3 4 5 {	
	use if year > 1998 using income_datanew, clear
	rename year year`num'
	drop if year==2020
	keep lab_income tot_income insoc unempben innav di lab_incG innav dself_inc year`num' sos_stonad aap arbled uforetrygd folketrygd woverfor wskpl_overf wskfrie_overf lopenr	
	for var lab_income tot_income insoc unempben innav di lab_incG dself_inc sos_stonad aap arbled uforetrygd folketrygd woverfor wskpl_overf wskfrie_overf: rename X X`num'
	replace innav`num' = 1 if innav> 1 & innav!=.

	
	ge work`num' = lab_incG`num'> 1 & lab_incG`num'!=.
	ge workv2_`num' = lab_incG`num'> 2 & lab_incG`num'!=.
	
	save income_datanew_rev`num', replace

	
	use hours, clear
	keep morethan3750 m_morethan35 m_morethan37 m_morethan40 m_morethan3750 partime fulltime hours year lopenr
	lab var partime 				"Agreed Parttime"
	lab var fulltime 				"Agreed Fulltime"
	lab var hours					"Agreed average weekly hours worked"
	for var morethan3750 m_morethan35 m_morethan37 m_morethan40 m_morethan3750 partime fulltime hours: rename X X`num'
	rename year year`num'
	save hours_rev`num', replace
	
	use sick_leave, clear
	rename year year`num'
	keep lopenr total_days_sl diag_F year`num'
	rename total_days_sl sl_days`num'
	rename diag_F 		 diag_F`num'
	save sl_rev`num', replace
}


foreach num in 5 {	
	use if year > 1998 using welfare_extra, clear
	rename year year`num'
	drop if year==2020
	for var dtjenestepensjon_afp dbarnetrygd dbostotte dstudiestipend dforsorgerfradrag dgrunn_hjelp  dkontantstotte dsykepenger: rename X X`num'
	replace dsykepenger5 = . if year5 <= 2005
	save welfare_rev`num', replace
}


**------------------------------------------------------------------------------*
* identify if an indiv has kids
**------------------------------------------------------------------------------*
use"$rawdata\Befolkn\Familie\slekt.dta", clear
keep lopenr antbarn fodselsaar_mnd_barn_01
drop if lopenr == "" 
gen yr_birth1 = substr(fodselsaar_mnd_barn_01,1,4)
destring yr_birth1, replace
ge any_kids = antbarn>1
replace antbarn=0 if antbarn==.
save kids_rev, replace


**------------------------------------------------------------------------------*
* cancer, cancer screening (pre & 2 years after)
**------------------------------------------------------------------------------*
use CancerDatabase.dta, clear
merge 1:1 lopenr year using CancerDatabaseHosp
drop _m
drop hospital

foreach var in blood digest musculoskeletal ear_resp eye_neuro skin gland uro_reproduct other {
		ge c_`var' = cancer_`var'_icpc == 1 | cancer_`var'_ICD == 1 | cancer_`var'_hosp == 1
		drop cancer_`var'_icpc cancer_`var'_ICD cancer_`var'_hosp
}
replace c_other = 1 if cancer_cardio_icpc == 1 & c_other==0
drop cancer_cardio_icpc

for var c_blood c_digest c_musculoskeletal c_ear_resp c_eye_neuro c_skin c_gland c_uro_reproduct c_other: rename X X_base
save temp_precancer, replace		

** to use as outcome
use temp_precancer, clear
foreach var in blood digest musculoskeletal ear_resp eye_neuro skin gland uro_reproduct other {
		rename c_`var'_base c_`var'1 
}
rename year year1		
save temp_cancer1, replace			



** get baseline cancer type
use swap_and_GP_rev3.dta, clear
keep lopenr yr_str_exog_swap  //keep id and year of first exog swap
keep if yr_str_exog_swap!=.
duplicates drop

gen year=yr_str_exog_swap-2
merge 1:1 lopenr year using temp_precancer, keep(1 3) nogen

foreach var in c_blood c_digest c_musculoskeletal c_ear_resp c_eye_neuro c_skin c_gland c_uro_reproduct c_other{
	replace `var'_base = 0 if `var'_base==.  & yr_str_exog_swap>=2008
	ge d`var'_base = `var'_base> 0 if yr_str_exog_swap>=2008
}

*keeps if year is yr of swap
keep if year==yr_str_exog_swap-2
save patientown_precancer.dta, replace

erase temp_precancer.dta


**------------------------------------------------------------------------------*
*** *Merge data **
**------------------------------------------------------------------------------*
use swap_and_GP_rev3.dta, clear
merge 1:1 lopenr using "mortality_rev.dta", keep(1 3) nogen
merge 1:1 lopenr using "patientparent_char_rev3.dta", keep(1 3) nogen
merge 1:1 lopenr using "patientown_char_rev3.dta", keep(1 3) nogen
merge 1:1 lopenr using "births_rev.dta", keep(1 3) nogen
merge 1:1 lopenr using "kids_rev.dta", keep(1 3) nogen

for num 1 2 3 4 5: ge yearX=yr_str_exog_swap+X
qui compress

* recode gender of individual
ge male = sex=="1" if sex!=""
drop sex

* age of death
ge yob = yr_str_exog_swap-str_exog_age
ge age_death = yr_death-yob

* other individual characteristics
gen bigcity= (str_exog_muni==301 | str_exog_muni==1201 | str_exog_muni==1601 | str_exog_muni==1103 |str_exog_muni==1001 )
gen hs0 = inlist(bu_1,1,2) if bu_1!=. & bu_1!=9
gen college0 = inlist(bu_1,6,7,8) if bu_1!=. & bu_1!=9
label var hs0 "Own Educ<HS 2 yrs before swap"
label var college0 "Some college 2 yrs before swap"

* merge in hospital, sick leave day and income
merge 1:1 lopenr year1 using npr1_rev, keep(1 3) nogen
merge 1:1 lopenr year2 using npr2_rev, keep(1 3) nogen
merge 1:1 lopenr year3 using npr3_rev, keep(1 3) nogen
merge 1:1 lopenr year4 using npr4_rev, keep(1 3) nogen
merge 1:1 lopenr year5 using npr5_rev, keep(1 3) nogen

merge 1:1 lopenr year1 using khur1, keep(1 3) nogen
merge 1:1 lopenr year2 using khur2, keep(1 3) nogen
merge 1:1 lopenr year3 using khur3, keep(1 3) nogen
merge 1:1 lopenr year4 using khur4, keep(1 3) nogen
merge 1:1 lopenr year5 using khur5, keep(1 3) nogen

merge 1:1 lopenr year1 using khur_er1, keep(1 3) nogen
merge 1:1 lopenr year2 using khur_er2, keep(1 3) nogen
merge 1:1 lopenr year3 using khur_er3, keep(1 3) nogen
merge 1:1 lopenr year4 using khur_er4, keep(1 3) nogen
merge 1:1 lopenr year5 using khur_er5, keep(1 3) nogen

for num 2 3 4 5: merge 1:1 lopenr yearX using sl_revX, keep(1 3) nogen
for num 2 3 4 5: merge 1:1 lopenr yearX using income_datanew_revX, keep(1 3) nogen
for num 2 3 4 5: merge 1:1 lopenr yearX using hours_revX, keep(1 3) nogen


*Determine death within timeframe of swap (j years)
foreach num in 1 2 3 4 5 6 7 8 {
	gen mortality_`num'year = 0
	gen mortality_`num'year_any = 0
	replace mortality_`num'year = 1 if (yr_death - yr_str_exog_swap <= `num')
	replace mortality_`num'year_any = 1 if (yr_death - yr_swap <= `num')
	label var mortality_`num'year "Mortality within `num' years of swap"
	
	* mortality causes
	for var inf heart o_heart cancer cancer_c cancer_d ext resp digest mental accident suicide homicide: ge X_`num'y = 0
	for var inf heart o_heart cancer cancer_c cancer_d ext resp digest mental accident suicide homicide: replace X_`num'y = X if (yr_death - yr_str_exog_swap <= `num') 
}
for var inf heart o_heart cancer cancer_c cancer_d ext resp mental accident suicide homicide: ge age_d_X= age_death if X==1

label var inf_2y 		"Mortality: Infectious diseases"
label var heart_2y 		"Mortality: heart conditions"
label var cancer_2y 	"Mortality: cancer"
label var cancer_c_2y 	"Mortality: malign cancer"
label var cancer_d_2y 	"Mortality: neoplasm"
label var ext_2y 		"Mortality: external conditions"
label var resp_2y 		"Mortality: respiratory conditions"
label var mental_2y 	"Mortality: mental health conditions"
label var accident_2y 	"Mortality: accident conditions"

label var inf_5y 		"Mortality: Infectious diseases"
label var heart_5y 		"Mortality: heart conditions"
label var cancer_5y 	"Mortality: cancer"
label var cancer_c_5y 	"Mortality: malign cancer"
label var cancer_d_5y 	"Mortality: neoplasm"
label var ext_5y 		"Mortality: external conditions"
label var resp_5y 		"Mortality: respiratory conditions"
label var mental_5y 	"Mortality: mental health conditions"
label var accident_5y 	"Mortality: accident conditions"

* must wort to be eligible to SL (also no SL for 2020)
for num 2 3 4 5: replace sl_daysX = 0 if sl_daysX == . & lab_incomeX!=. & mortality_Xyear!=1 & yearX != 2020
for num 2 3 4 5: ge dwoverforX = woverforX > 0 if mortality_Xyear!=1 & yearX != 2020


* birth outcomes X-years after swap
foreach  var in gestweeks  lenght  bweight apgar1_10 apgar5_10 {
	ge `var'_5y = `var' if (year_baby - yr_str_exog_swap <= 5)
	ge `var'_2y = `var' if (year_baby - yr_str_exog_swap <= 2)
}

	
* fill in hospitalizations/sl to 0 if missing (5 years post swap)
foreach var in npr npr_er npr_noer npr_in npr_out treat charlindex npr_resp npr_cancer_c npr_cancer_d npr_heart npr_inf /*
	*/ npr_disgest npr_muscle npr_skin npr_nervous npr_ear_eye  npr_endocr npr_gen npr_ac_inj /*
	*/ npr_ami npr_chronic_isch npr_hfailure npr_stroke /* deadly heart conditions
	*/ npr_genex npr_comex npr_reprex  npr_spex npr_emoex npr_famex npr_cancerex npr_chemo   {  /* medical screenings*/
    
	egen s`var' = rsum(`var'1 `var'2 `var'3 `var'4 `var'5)
	replace `var'5 = 0 if `var'5==.  & yr_str_exog_swap>=2003  
	ge d`var'5 = `var'5 > 0 if yr_str_exog_swap>=2003 & `var'5!=.
	
	replace `var'2 = 0 if `var'2==.  & yr_str_exog_swap>=2006  
	ge d`var'2 = `var'2 > 0 if yr_str_exog_swap>=2006 & `var'2!=.
	
	replace `var'1 = 0 if `var'1==.  & yr_str_exog_swap>=2007  
	ge d`var'1 = `var'1 > 0 if yr_str_exog_swap>=2007 & `var'1!=.
}

foreach var in total simple_contact consult sick_visit procedure blood_test labtest small_procedure large_procedure appointment reimburse_def sl sl_p diag_test ///
	prescription referral er life_style_adv reimburse_er cancer diag_L diag_K diag_D diag_P heart_chronic endoc_chronic {
	replace `var'1 = 0 if `var'1==. & yr_str_exog_swap>=2005
	ge d`var'1 = `var'1>0 if yr_str_exog_swap>=2005 & `var'1!=.
}
ge total_reimb1 = reimburse_def1 + reimburse_er1

egen any_chronic0 = rmax(dcancer_base ddigest_chronic_base dheart_chronic_base dendoc_chronic_base dany_cardiogp_base) if yr_str_exog_swap>=2008 


preserve
*Drop if no exogenous swaps
drop if yr_str_exog_swap==.
*drop obser with swap after death
drop if yr_death < yr_str_exog_swap
save clean_patientlevel_file_rev3.dta, replace


restore
*Drop if no swaps
drop if yr_swap==.
merge 1:1 lopenr using "patientown_char_any_rev3.dta"
drop _m
save clean_patientlevel_file_any_rev3.dta, replace



use clean_patientlevel_file_rev3.dta, clear
merge 1:1 lopenr using "patientown_precancer.dta", keep(1 3) nogen
merge 1:1 lopenr year1 using  temp_cancer1, keep(1 3) nogen
merge 1:1 lopenr year1 using  khur1_screening, keep(1 3) nogen
merge 1:1 lopenr year1 using  khur_screen, keep(1 3) nogen
merge 1:1 lopenr year5 using  welfare_rev5, keep(1 3) nogen


* fill in PC and hospitalizations to 0 if missing 
ge same_type_c = .
foreach var in c_blood c_digest c_musculoskeletal c_ear_resp c_eye_neuro c_skin c_gland c_uro_reproduct c_other {  
   
	replace `var'1 = 0 if `var'1==.  & yr_str_exog_swap>=2005
	replace same_type_c = `var'1== 1 & `var'_base == 1 if yr_str_exog_swap>=2005 & same_type_c!=1
}

for var general_sy dig_sy resp_sy  fem_sy pap_test: rename X X1
foreach var in diag_A general_sy dig_sy resp_sy  {
	replace `var'1 = 0 if `var'1==. & yr_str_exog_swap>=2005
	ge d`var'1 = `var'1>0 if yr_str_exog_swap>=2005 & `var'1!=.
}

foreach var in fem_sy pap_test {
	replace `var'1 = 0 if `var'1==. & yr_str_exog_swap>=2005 
	ge d`var'1 = `var'1>0 if yr_str_exog_swap>=2005 & `var'1!=. 
}
for var fem_sy1 pap_test1 dfem_sy1 dpap_test1: replace X = . if male ==1
*same_type_c npr_cancerex1 
save patientlevel_rev_extended.dta, replace

